********************************************************************************
* Authors: Henrik Andersson & Sirus Dehdari
* Description: This dofile creates Figure C3
********************************************************************************

clear all
set more off


*1. ///merge randomized worker data with electiondistrict-ids; and collapse 

foreach y in 2006 2010 2014 {
*first the random set of workplaces
use "E:\ProjData\IntegrationSD\temp\random_wpshares_`y'.dta"

*Now merge with ind data
merge 1:m ArbstId using "D:\SCB_ConPol\Stata\LISA\LISA_`y'.dta", keepusing(LopNr ArbstId)

keep if _merge==3
drop _merge

*in 2006, 2010 and 2014; can have some duplicates (43 surplus in full data set in 2010; 13 in 2014; 6 in 2006)

duplicates drop LopNr, force

///Valdeltagardata

merge 1:1 LopNr using "E:\ProjData\IntegrationSD\temp/`y'_electiondistrictsperperson.dta"
keep if _merge==3
drop _merge 

ds , not(type long)
local varlist `r(varlist)'
display "`varlist'"
collapse (mean) `varlist' , by(llkkdddd)

drop ArbstId
save "E:\ProjData\IntegrationSD\temp\workplace_random`y'.dta", replace

}

*2.///HAVING DONE THIS WE MUST REDO THE COMPUTE SHARE PART


///Merge onto mapped files for the mapped districts

*2010-2014
use "E:\ProjData\IntegrationSD\temp\workplace_random2010.dta"
gen str8 precinct2010 = string(llkkdddd,"%08.0f")
drop llkkdddd

merge 1:m precinct2010 using "E:\ProjData\IntegrationSD\temp\vd_2010_2014_shares"
drop _merge

order precinct2010 whole_pop precinct2014 totbef share
save "E:\ProjData\IntegrationSD\temp\vd_2010_2014_shares_r_alt", replace


*2006-2014

use "E:\ProjData\IntegrationSD\temp\workplace_random2006.dta"
gen str8 precinct2006 = string(llkkdddd,"%08.0f")
drop llkkdddd

merge 1:m precinct2006 using "E:\ProjData\IntegrationSD\temp\vd_2006_2014_shares"
drop _merge

order precinct2006 whole_pop precinct2014 totbef share
save "E:\ProjData\IntegrationSD\temp\vd_2006_2014_shares_r_alt", replace


///NOW TO ONE FILE
clear
cd "E:\"

* Setting path to temp folder:
local out_temp "E:\ProjData\IntegrationSD\temp\"


*** Henrik: här specificerar du namnet på den variabel som ligger allra sist i din variabellista.
* Variabeln "pop" ligger ju först, så vi sparar en local med namnet på den som ligger sist
local last_var = "share_im_cworker_rand1000"


*** Cleaning 2010 election data (changing precinct code and variable name):
use `out_temp'2010_electionresults, clear

* Adding "0" to districts where it has been dropped because of numeric:
tostring llkkdddd, replace
replace llkkdddd = "0" + llkkdddd if strlen(llkkdddd) < 8
rename llkkdddd precinct2010

* Merging election data with the precinct combos:
merge 1:m precinct2010 using `out_temp'\vd_2010_2014_shares_r_alt.dta

drop _merge

* Multiplying each election result (in numbers) with combo shares:
foreach var of varlist share_im_cworker_rand1-`last_var'{
quietly replace `var' = share*`var'
}

* Aggregate to 2014 precints:
collapse (sum) share_im_cworker_rand1-`last_var', by(precinct2014)

gen year = 2010


save `out_temp'reg_2010_2014_alt, replace



*************************
*** 2006:
clear
cd "E:\"

* Setting path to temp folder:
local out_temp "E:\ProjData\IntegrationSD\temp\"

local last_var = "share_im_cworker_rand1000"


import excel "D:\Data\ExtData\ValResData\Riksdagsval_2006\Riksdagsval_2006.xls", sheet("riksdagsvalet_vd_2006_orginal") firstrow clear

rename LKFV precinct2006

merge 1:m precinct2006 using `out_temp'vd_2006_2014_shares_r_alt, keep(3)
drop _merge

foreach var of varlist share_im_cworker_rand1-`last_var'{
quietly replace `var' = share*`var'
}

collapse (sum) share_im_cworker_rand1- `last_var', by(precinct2014)

gen year = 2006

save `out_temp'reg_2006_2014_alt, replace


///CREATE PANEL

///Now put the 2010, 2006 and 2014 file together and prepare panel

clear all
use "E:\ProjData\IntegrationSD\temp\workplace_random2014.dta"
gen year=2014 
 append using "E:\ProjData\IntegrationSD\temp\reg_2010_2014_alt.dta"
gen str8 z = string(llkkdddd,"%08.0f")

 replace precinct2014=z if year==2014
 drop llkkdddd z
  append using "E:\ProjData\IntegrationSD\temp\reg_2006_2014_alt.dta"
  replace year=2006 if year==.

  *time variable
gen t=1 if year==2006
replace t=2 if year==2010
replace t=3 if year==2014

*panel setting
order precinct2014 year t
destring precinct2014, replace
xtset precinct2014 t
sort precinct2014 t

*duplicates tag precinct2014, gen(tag)
*bysort year: tab tag
*5,044 districts in 2010 and 2014 both years
*4712 districts all years
*272 enbart 2010
*709 enbart 2006
*659 enbart 2014

merge 1:1 precinct2014 year using "E:\ProjData\IntegrationSD\use\main.dta"
drop _merge
save "E:\ProjData\IntegrationSD\use\randomdata_main_alt.dta", replace




*3. ///NOW THE ANALYSIS

sort precinct2014 t
///GENERATE SOME MORE VARIABLES

///THE POLITICAL MEASURE (PERCENT) ARE DIFFERENT IN 10 AND 14 - CHANGE
foreach y in Mproc Cproc FPproc KDproc Sproc Vproc MPproc SDproc FIproc PPproc {
replace `y' = `y'*100 if year==2014
}

*INDPENDENT
local last_var = "share_im_cworker_rand1000"

foreach y of numlist 1(1)1000 {
replace share_im_cworker_rand`y' = share_im_cworker_rand`y'*100
}


*Other variables
gen unem_proc_swed = (unemployed_swedish/swedish)*100
gen other_share = (other_im/pop)*100

gen low_educ_share = (low_educ/pop)*100
gen high_educ_share = (high_educ/pop)*100

gen pop2 = pop^2

gen ctz_other_share = (ctz_other/other_im)*100

gen logwage = ln(LoneInk)
*share young
gen young_share = (young/pop)*100

*Workerdata
gen other_high_educ_share = (other_high_educ/other_im)*100
gen other_low_educ_share = (other_low_educ/other_im)*100


replace share_im_cworker_swe = share_im_cworker_swe*100
replace sh_im_cw_noi_swe = sh_im_cw_noi_swe*100

///regions
foreach x in latin mena asia africa rest nordic western {
replace share_`x'_cworker_swe = share_`x'_cworker_swe*100
}

///TYPE YEAR
foreach x in newim mediumnewim notnewim oldim highskother lowskother {
replace share_`x'_cworker_swe = share_`x'_cworker_swe*100
}

///sector:
replace sector_share_im = sector_share_im*100

*Municipality
gen str8 str_dstr = string(precinct2014,"%08.0f")
gen kom_str = substr(str_dstr, 1,4)
destring kom_str, replace
*Add on labor market regions
merge m:1 kom_str using "E:\ProjData\IntegrationSD\temp\lmr2014"
drop _merge 
encode Kod, gen(lmr14)
drop Kod


***************
* ANALYSIS *
*************

*************************
* baseline table *
*************************


///regular - no float environment for paper

*standard explanatory variable
foreach y of numlist 1(1)1000 {
egen std_treat`y' = std(share_im_cworker_rand`y')
}

set matsize 11000

	matrix A = J(8000,4,.)
* Next, loop over numbers from 5-25 and run the regression for each bandwidth.


	forvalues y=1/1000{

	 *scalar foreignshare = `n'
xtreg SDproc std_treat`y' pop pop2 logwage low_educ_share ALosDag ctz_other_share ///
unem_proc_swed other_share other_low_educ_share wage_cw_wp share_male_cw_wp share_young_cw_wp i.year##i.lmr14, fe vce(cluster precinct2014)
			matrix A[`y',1] = _b[std_treat`y']
			matrix A[`y',2] = _b[std_treat`y'] + 1.96*_se[std_treat`y']
			matrix A[`y',3] = _b[std_treat`y'] - 1.96*_se[std_treat`y']
			matrix A[`y',4] = `y'
		}
	svmat A

	rename A1 estimate2
	rename A2 upper2
	rename A3 lower2
	rename A4 n

graph twoway rcap upper2 lower2 n, lstyle(ci) ||  scatter estimate2 n, mstyle(p1) legend(off) graphregion(color(white)) ytitle(Coefficient Size) xtitle("")
graph export "C:\Userdata\Shared\Output\IntegrationSD\random.pdf", replace


log close
